#!/usr/bin/env python3
#
# This script contains the actual logic for upgrading from an old
# version of Zulip to the new version.  upgrade-zulip-stage-3 is
# always run from the new version of Zulip, so any bug fixes take
# effect on the very next upgrade.
import argparse
import glob
import hashlib
import logging
import os
import re
import shutil
import subprocess
import sys
import time

os.environ["PYTHONUNBUFFERED"] = "y"

# Force a known locale.  Some packages on PyPI fail to install in some locales.
os.environ["LC_ALL"] = "C.UTF-8"
os.environ["LANG"] = "C.UTF-8"
os.environ["LANGUAGE"] = "C.UTF-8"

sys.path.append(os.path.join(os.path.dirname(__file__), "..", ".."))
from scripts.lib.zulip_tools import (
    DEPLOYMENTS_DIR,
    assert_running_as_root,
    get_config,
    get_config_file,
    listening_publicly,
    parse_version_from,
    run_psql_as_postgres,
    start_arg_parser,
    su_to_zulip,
)

assert_running_as_root()

# Set a known, reliable PATH
os.environ["PATH"] = "/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin"

logging.Formatter.converter = time.gmtime
logging.basicConfig(format="%(asctime)s upgrade-zulip-stage-3: %(message)s", level=logging.INFO)

# make sure we have appropriate file permissions
os.umask(0o22)

restart_parser = start_arg_parser(action="restart", add_help=False)

parser = argparse.ArgumentParser(parents=[restart_parser])
parser.add_argument("deploy_path", metavar="deploy_path", help="Path to deployment directory")
parser.add_argument(
    "--skip-restart",
    action="store_true",
    help="Configure, but do not restart into, the new version; aborts if any system-wide changes would happen.",
)
parser.add_argument("--skip-puppet", action="store_true", help="Skip doing puppet/apt upgrades.")
parser.add_argument("--skip-migrations", action="store_true", help="Skip doing migrations.")
parser.add_argument(
    "--skip-downgrade-check",
    action="store_true",
    help="Skip the safety check to prevent database downgrades.",
)
parser.add_argument(
    "--from-git", action="store_true", help="Upgrading from git, so run update-prod-static."
)
parser.add_argument(
    "--ignore-static-assets",
    action="store_true",
    help="Do not attempt to copy/manage static assets.",
)
parser.add_argument(
    "--skip-purge-old-deployments", action="store_true", help="Skip purging old deployments."
)
parser.add_argument(
    "--audit-fts-indexes", action="store_true", help="Audit and fix full text search indexes."
)
args = parser.parse_args()

if args.skip_restart:
    if args.less_graceful:
        logging.warning("Ignored --less-graceful; --skip-restart is always graceful.")
        args.less_graceful = False
    if args.skip_migrations:
        logging.warning(
            "Ignored --skip-migrations; all upgrades with --skip-restart asserts no migrations."
        )
        args.skip_migrations = False
    if args.skip_puppet:
        logging.warning(
            "Ignored --skip-puppet; all upgrades with --skip-restart asserts no puppet changes."
        )
        args.skip_puppet = False

if not args.skip_puppet and args.less_graceful:
    logging.warning("Ignored --less-graceful; all upgrades without --skip-puppet are ungraceful.")

deploy_path = args.deploy_path
os.chdir(deploy_path)

config_file = get_config_file()

IS_SERVER_UP = True
HAS_FILLED_CACHES = False

if args.from_git:
    logging.info("Caching Zulip Git version...")
    subprocess.check_call(["./tools/cache-zulip-git-version"], preexec_fn=su_to_zulip)

from version import ZULIP_VERSION as NEW_ZULIP_VERSION

old_version = parse_version_from(DEPLOYMENTS_DIR + "/current")
logging.info("Upgrading from %s to %s, in %s", old_version, NEW_ZULIP_VERSION, deploy_path)

# Check if rabbitmq port 25672 is listening on anything except 127.0.0.1
rabbitmq_dist_listen = listening_publicly(25672)
# Check the erlang magic cookie size
cookie_size: int | None = None
if os.path.exists("/var/lib/rabbitmq/.erlang.cookie"):
    with open("/var/lib/rabbitmq/.erlang.cookie") as cookie_fh:
        cookie_size = len(cookie_fh.readline())
else:
    logging.info("No RabbitMQ erlang cookie found, not auditing RabbitMQ security.")
if (args.skip_restart or args.skip_puppet) and rabbitmq_dist_listen:
    logging.error(
        "RabbitMQ is publicly-accessible on %s; this is a security vulnerability!",
        ", ".join(rabbitmq_dist_listen),
    )
    issue = "issue"
    if cookie_size is not None and cookie_size == 20:
        # See the below comment -- this is used as a lightweight
        # signal for a cookie made with Erlang's bad randomizer.
        logging.error(
            "RabbitMQ erlang cookie is insecure; this is a critical security vulnerability!"
        )
        issue = "issues"
    logging.error(
        "To fix the above security %s, re-run the upgrade without --skip-puppet "
        "(which may be set in /etc/zulip/zulip.conf), in order to restart the "
        "necessary services.  Running zulip-puppet-apply by itself is not sufficient.",
        issue,
    )
    sys.exit(1)


migrations_needed = False


def fill_memcached_caches() -> None:
    global HAS_FILLED_CACHES
    if HAS_FILLED_CACHES or migrations_needed:
        return
    subprocess.check_call(
        ["./manage.py", "fill_memcached_caches", "--automated", "--skip-checks"],
        preexec_fn=su_to_zulip,
    )
    HAS_FILLED_CACHES = True


def shutdown_server(fill_caches: bool = True) -> None:
    global IS_SERVER_UP

    if args.skip_restart:
        logging.info("Upgrade would require shutting down Zulip -- aborting!")
        sys.exit(1)

    if fill_caches:
        fill_memcached_caches()

    if IS_SERVER_UP:
        logging.info("Stopping Zulip...")
        subprocess.check_call(["./scripts/stop-server"], preexec_fn=su_to_zulip)
        IS_SERVER_UP = False


if glob.glob("/usr/share/postgresql/*/extension/tsearch_extras.control"):
    # Remove legacy tsearch_extras package references
    run_psql_as_postgres(
        config_file=config_file,
        sql_query="DROP EXTENSION IF EXISTS tsearch_extras;",
    )
    subprocess.check_call(["apt-get", "remove", "-y", "postgresql-*-tsearch-extras"])

if not (args.skip_restart or args.skip_puppet):
    # We need to temporarily hold pgroonga, if installed -- upgrading
    # it without running the appropriate upgrade SQL can cause
    # PostgreSQL to crash
    if get_config(config_file, "machine", "pgroonga", False):
        subprocess.check_call(["apt-mark", "hold", "postgresql-*-pgdg-pgroonga"])
    logging.info("Upgrading system packages...")
    subprocess.check_call(["apt-get", "update"])
    subprocess.check_call(["apt-get", "-y", "--with-new-pkgs", "upgrade"])
    if get_config(config_file, "machine", "pgroonga", False):
        subprocess.check_call(["apt-mark", "unhold", "postgresql-*-pgdg-pgroonga"])

# To bootstrap zulip-puppet-apply, we need to install the system yaml
# package; new installs get this, but old installs may not have it.
if not os.path.exists("/usr/share/doc/python3-yaml"):
    logging.info("Installing system YAML package, for puppet...")
    subprocess.check_call(["apt-get", "install", "python3-yaml"])

if not os.path.exists(os.path.join(deploy_path, "zproject/prod_settings.py")):
    # This is normally done in unpack-zulip, but for upgrading from
    # zulip<1.4.0, we need to do it.  See discussion in commit 586b23637.
    os.symlink("/etc/zulip/settings.py", os.path.join(deploy_path, "zproject/prod_settings.py"))

# Now we should have an environment set up where we can run our tools;
# first, creating the production venv.
subprocess.check_call(
    [os.path.join(deploy_path, "scripts", "lib", "create-production-venv"), deploy_path]
)

# Check to make sure that this upgrade is not actually a database
# downgrade.
if not args.skip_downgrade_check:
    subprocess.check_call(
        [os.path.join(deploy_path, "scripts", "lib", "check-database-compatibility")],
        preexec_fn=su_to_zulip,
    )

# Make sure the right version of node is installed
subprocess.check_call([os.path.join(deploy_path, "scripts", "lib", "install-node")])

# Generate any new secrets that were added in the new version required.
# TODO: Do caching to only run this when it has changed.
subprocess.check_call(
    [os.path.join(deploy_path, "scripts", "setup", "generate_secrets.py"), "--production"]
)

# Adjust Puppet class names for the manifest renames in the 4.0 release
class_renames = {
    "zulip::app_frontend": "zulip::profile::app_frontend",
    "zulip::dockervoyager": "zulip::profile::docker",
    "zulip::memcached": "zulip::profile::memcached",
    "zulip::postgres_appdb_tuned": "zulip::profile::postgresql",
    "zulip::postgres_backups": "zulip::postgresql_backups",
    "zulip::rabbit": "zulip::profile::rabbitmq",
    "zulip::voyager": "zulip::profile::standalone",
}
classes = re.split(r"\s*,\s*", get_config(config_file, "machine", "puppet_classes", ""))
new_classes = [class_renames.get(c, c) for c in classes if c != "zulip::base"]
if classes != new_classes:
    if args.skip_restart:
        logging.error("Would need to adjust puppet classes -- aborting!")
        sys.exit(1)
    logging.info("Adjusting Puppet classes for renames...")
    subprocess.check_call(
        [
            "crudini",
            "--set",
            "/etc/zulip/zulip.conf",
            "machine",
            "puppet_classes",
            ", ".join(new_classes),
        ]
    )

# Unpleasant migration: Remove any legacy deployed copies of
# images-google-64 from before we renamed that emojiset to
# "googleblob":
emoji_path = "/home/zulip/prod-static/generated/emoji/images-google-64/1f32d.png"
if os.path.exists(emoji_path):
    with open(emoji_path, "rb") as f:
        emoji_data = f.read()
    emoji_sha = hashlib.sha1(emoji_data).hexdigest()
    if emoji_sha == "47033121dc20b376e0f86f4916969872ad22a293":
        if args.skip_restart:
            logging.error("Would need to delete images-google-64 -- aborting!")
            sys.exit(1)
        shutil.rmtree("/home/zulip/prod-static/generated/emoji/images-google-64")

# And then, building/installing the static assets.
if args.ignore_static_assets:
    # For the OS version upgrade use case, the static assets are
    # already in place, and we don't need to do anything.  Further,
    # neither of the options below will work for all installations,
    # because if we installed from Git, `prod-static/serve` may be
    # empty so we can't do the non-Git thing, whereas if we installed
    # from a tarball, we won't have a `tools/` directory and thus
    # cannot run `tools/update-prod-static`.
    pass
elif args.from_git:
    # Because `upgrade-zulip-from-git` needs to build static assets, it
    # is at risk of being OOM killed on systems with limited free RAM.
    mem_bytes = os.sysconf("SC_PAGE_SIZE") * os.sysconf("SC_PHYS_PAGES")
    mem_gib = mem_bytes / (1024.0**3)  # e.g. 3.74

    # Ideally, we'd have 2 thresholds here, depending on whether the
    # system is running queue workers multithreaded or multiprocess.
    # See puppet/zulip/manifests/app_frontend_base.pp for background.
    if mem_gib < 4.2:
        logging.info("Shutting down server to ensure sufficient free RAM for webpack.")
        shutdown_server(fill_caches=False)

    # Note: The fact that this is before we apply Puppet changes means
    # that we don't support adding new Puppet dependencies of
    # update-prod-static with the Git upgrade process.  But it'll fail
    # safely; this seems like a worthwhile tradeoff to minimize downtime.
    logging.info("Building static assets...")
    try:
        subprocess.check_call(["./tools/update-prod-static"], preexec_fn=su_to_zulip)
    except subprocess.CalledProcessError:
        logging.error("Failed to build static assets.")
        if IS_SERVER_UP:
            logging.error("Usually the cause is insufficient free RAM to run webpack.")
            logging.error("Try stopping the Zulip server (scripts/stop-server) and trying again.")
        sys.exit(1)

else:
    # Since this doesn't do any actual work, it's likely safe to have
    # this run before we apply Puppet changes (saving a bit of downtime).
    logging.info("Installing static assets...")
    subprocess.check_call(
        ["cp", "-rT", os.path.join(deploy_path, "prod-static/serve"), "/home/zulip/prod-static"],
        preexec_fn=su_to_zulip,
    )

# Perform system checks -- including database checks, so we don't need
# to do them when we do migrations, below.
if not args.skip_checks:
    subprocess.check_call(["./manage.py", "check", "--database", "default"], preexec_fn=su_to_zulip)

# Our next optimization is to check whether any migrations are needed
# before we start the critical section of the restart.  This saves
# about 1s of downtime in a no-op upgrade.  We omit this check if we
# already stopped the server above, due to low memory.
if not IS_SERVER_UP:
    migrations_needed = True
elif not args.skip_migrations:
    logging.info("Checking for needed migrations")
    migrations_output = subprocess.check_output(
        ["./manage.py", "showmigrations", "--skip-checks"], preexec_fn=su_to_zulip, text=True
    )
    for ln in migrations_output.split("\n"):
        line_str = ln.strip()
        if line_str.startswith("[ ]"):
            migrations_needed = True

if args.skip_restart and migrations_needed:
    logging.error("Would need to apply migrations -- aborting!")
    sys.exit(1)

# Install hooks before we check for puppet changes, so we skip an
# unnecessary stop/start cycle if hook changes are the only ones.
logging.info("Installing hooks")
subprocess.check_call(
    ["./scripts/zulip-puppet-apply", "--tags", "hooks", "--force"],
)

# If we are planning on running puppet, we can pre-run it in --noop
# mode and see if it will actually make any changes; if not, we can
# skip it during the upgrade.  We omit this check if the server is
# already stopped, since it's not better than just pressing on.
has_puppet_changes = True
if not args.skip_puppet and IS_SERVER_UP:
    logging.info("Pre-checking for puppet changes...")
    try_puppet = subprocess.run(
        ["./scripts/zulip-puppet-apply", "--noop", "--force"],
        stdout=subprocess.DEVNULL,
        check=False,
    )
    if try_puppet.returncode == 0:
        if args.skip_restart:
            logging.info("Verified no Puppet changes are necessary.")
        else:
            logging.info("No puppet changes found, skipping!")
        args.skip_puppet = True
        has_puppet_changes = False
    elif try_puppet.returncode == 2:
        logging.error("Puppet error -- aborting!")
        sys.exit(1)
    elif args.skip_restart:
        logging.error("Would need to apply puppet changes -- aborting!")
        sys.exit(1)


if args.skip_restart:
    logging.info("Successfully configured in %s!", deploy_path)
else:
    # NOTE: Here begins the most likely critical period, where we may be
    # shutting down the server; we should strive to minimize the number of
    # steps that happen between here and the "Restarting Zulip" line
    # below.

    subprocess.check_call(["./scripts/lib/run_hooks.py", "pre-deploy"])

    if rabbitmq_dist_listen:
        shutdown_server()
        logging.info("Shutting down rabbitmq to adjust its ports...")
        subprocess.check_call(["/usr/sbin/service", "rabbitmq-server", "stop"])

    if cookie_size is not None and cookie_size == 20:
        # Checking for a 20-character cookie is used as a signal that it
        # was generated by Erlang's insecure randomizer, which only
        # provides between 20 and 36 bits of entropy; were it 20
        # characters long by a good randomizer, it would be 96 bits and
        # more than sufficient.  We generate, using good randomness, a
        # 255-character cookie, the max allowed length.
        shutdown_server()
        logging.info("Generating a secure erlang cookie...")
        subprocess.check_call(["./scripts/setup/generate-rabbitmq-cookie"])

    if not args.skip_puppet:
        # Puppet may adjust random services; to minimize risk of issues
        # due to inconsistent state, we shut down the server first.
        shutdown_server()
        logging.info("Applying Puppet changes...")
        subprocess.check_call(["./scripts/zulip-puppet-apply", "--force"])
        subprocess.check_call(["apt-get", "-y", "--with-new-pkgs", "upgrade"])
        # Puppet may have reloaded supervisor, and in so doing started
        # services; mark as potentially needing to stop the server.
        IS_SERVER_UP = True

    if migrations_needed:
        # Database migrations assume that they run on a database in
        # quiesced state.
        shutdown_server()
        logging.info("Applying database migrations...")
        subprocess.check_call(
            ["./manage.py", "migrate", "--noinput", "--skip-checks"], preexec_fn=su_to_zulip
        )

    logging.info("Restarting Zulip...")
    start_args = ["--skip-checks", "--skip-client-reloads"]
    if not HAS_FILLED_CACHES:
        start_args.append("--fill-cache")
    if IS_SERVER_UP:
        if args.less_graceful:
            start_args.append("--less-graceful")
        subprocess.check_call(["./scripts/restart-server", *start_args], preexec_fn=su_to_zulip)
    else:
        subprocess.check_call(["./scripts/start-server", *start_args], preexec_fn=su_to_zulip)

    logging.info("Upgrade complete!")

    subprocess.check_call(["./scripts/lib/run_hooks.py", "post-deploy"])

    if not args.skip_client_reloads:
        subprocess.check_call(["./scripts/reload-clients"], preexec_fn=su_to_zulip)

if args.audit_fts_indexes:
    logging.info("Correcting full-text search indexes for updated dictionary files")
    logging.info("This may take a while but the server should work while it runs.")
    subprocess.check_call(
        ["./manage.py", "audit_fts_indexes", "--automated", "--skip-checks"], preexec_fn=su_to_zulip
    )

if not args.skip_purge_old_deployments:
    logging.info("Purging old deployments...")
    subprocess.check_call(["./scripts/purge-old-deployments"])
else:
    logging.info("Skipping purging old deployments.")

if args.skip_puppet and has_puppet_changes:
    logging.info("Showing un-applied Puppet changes:")
    subprocess.check_call(["./scripts/zulip-puppet-apply", "--noop", "--show_diff"])
